In [1]:
import pandas as pd
import altair as alt
In [2]:
url = 'https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/licenses_fall2022.csv'
data = pd.read_csv(url)
data.head() # Display the first few rows to understand the data
Out[2]:
| _id | License Type | Description | License Number | License Status | Business | Title | First Name | Middle | Last Name | ... | Specialty/Qualifier | Controlled Substance Schedule | Delegated Controlled Substance Schedule | Ever Disciplined | LastModifiedDate | Case Number | Action | Discipline Start Date | Discipline End Date | Discipline Reason | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1189509 | DETECTIVE BOARD | PERMANENT EMPLOYEE REGISTRATION | 129446286 | NOT RENEWED | N | NaN | EILEEN | NaN | SANTACRUZ | ... | NaN | NaN | NaN | N | 03/18/2022 | NaN | NaN | NaN | NaN | NaN |
| 1 | 801037 | DETECTIVE BOARD | FIREARM CONTROL CARD | 229030294.0 | NOT RENEWED | N | NaN | DAGMAR | J | NORDLUND | ... | NaN | NaN | NaN | N | 08/16/2006 | NaN | NaN | NaN | NaN | NaN |
| 2 | 365129 | COSMO | LICENSED COSMETOLOGIST | 11053076.0 | NOT RENEWED | N | NaN | RADOJE | NaN | ZELENOVIC | ... | NaN | NaN | NaN | N | 05/26/2006 | NaN | NaN | NaN | NaN | NaN |
| 3 | 595427 | COSMO | LICENSED COSMETOLOGIST | 11295645.0 | ACTIVE | N | NaN | BECKY SUE | L | BURROUGHS | ... | NaN | NaN | NaN | N | 11/12/2021 | NaN | NaN | NaN | NaN | NaN |
| 4 | 653668 | COSMO | LICENSED NAIL TECHNICIAN | 169006247 | NOT RENEWED | N | NaN | BILL G | L | LETNER | ... | NaN | NaN | NaN | N | 05/30/2006 | NaN | NaN | NaN | NaN | NaN |
5 rows × 31 columns
In [3]:
# Checking for missing values
print(data.isnull().sum())
data = data.dropna(subset=['License Status'])
data = data.drop(columns=['Middle', 'Suffix', 'Title']) # Example of dropping columns
# Convert dates if needed
data['LastModifiedDate'] = pd.to_datetime(data['LastModifiedDate'])
_id 0 License Type 0 Description 0 License Number 60 License Status 0 Business 0 Title 9890 First Name 395 Middle 6378 Last Name 395 Prefix 9997 Suffix 9590 Business Name 0 BusinessDBA 9885 Original Issue Date 5 Effective Date 792 Expiration Date 500 City 11 State 0 Zip 71 County 411 Specialty/Qualifier 9692 Controlled Substance Schedule 9791 Delegated Controlled Substance Schedule 10000 Ever Disciplined 0 LastModifiedDate 0 Case Number 9657 Action 9658 Discipline Start Date 9657 Discipline End Date 9861 Discipline Reason 9713 dtype: int64
In [4]:
# Basic bar chart showing the count of licenses by License Type
alt.data_transformers.disable_max_rows()
chart1 = alt.Chart(data).mark_bar().encode(
x=alt.X('License Type:N', title='License Type', sort='-y'),
y=alt.Y('count():Q', title='Count of Licenses'),
color='License Type:N'
).properties(
title='Count of Licenses by Type',
width=600,
height=400
).configure_axis(
labelAngle=-45
)
chart1
Out[4]:
In [16]:
filtered_data = data[data['License Status'].isin(['ACTIVE', 'EXPIRED', 'NOT RENEWED'])] # Example statuses to focus on
filtered_chart = alt.Chart(filtered_data).mark_line(point=True).encode(
x=alt.X('yearmonth(LastModifiedDate):T', title='Year-Month'),
y=alt.Y('count():Q', title='Count of Licenses'),
color='License Status:N',
tooltip=['License Status', 'count()']
).properties(
title='Filtered Count of Licenses by Status',
width=700,
height=400
).interactive()
filtered_chart
Out[16]:
In [ ]: